***************
* Title: gambia_ecd_edcc_figure4.do
* Author: Todd Pugatch
* Description: replication code for Blimpo, Carneiro, Jervis, and Pugatch,
*	"Improving Access and Quality in Early Childhood Development Programs: 
*		Experimental Evidence from The Gambia"
*	for Economic Development and Cultural Change
* Inputs: 	ECD_3to6_Gambia_cleanv1.dta
* Outputs: 	gambia_ecd_edcc_figure4.txt 
*			gambia_ecd_edcc_figure4_gc[1-2].[wmf/eps]
* Notes: creates Figure 4 in the paper
****************
#delimit;
local start=`"$S_TIME"';
clear;
clear matrix;
clear mata;
graph drop _all;
cap log close;
set more off;
/*set directory:
	cd mydir
*/
local data=`"Data\cleaned"';
local output=`"analysis\output"';
*log using analysis\output\gambia_ecd_edcc_figure4.txt, text replace;

* LOAD AND PREPARE DATA;
* define sample as in gambia_ecd_attrition1.do;
qui use `data'\ECD_3to6_Gambia_cleanv1, clear;
/*NEED TO CHECK TREATMENT STATUS OF SETTLEMENT 37028. DISREGARD FOR NOW*/
qui drop if settlement_code==37028;

* define 3 groups:
	--in baseline
	--in endline (original sample)
	--in endline (newly sampled);
qui gen in_baseline=(ip22!=3 & ip22!=.);	
qui gen in_endline=(interview_result==1);
qui gen in_endline_old=(in_endline==1 & in_baseline==1);
qui gen in_endline_new=(in_endline==1 & in_baseline==0);
/*treat unresolved gender mismatches as new to endline*/
qui replace in_endline_new=1 if in_endline_new==0 & child_gender_mismatch_resolved==0;
qui replace in_endline_old=. if in_endline_new==1;

/*keep eligibles*/
* keep if baseline age from 3-6 years, or new to endline;
count; 
keep if (child_age_mths_dob>=36 & child_age_mths_dob<84 & in_baseline==1)|(child_age_mths_dob==. & in_baseline==0);

* keep if new to endline and would have been 3-6 at baseline (define as 4-8 at endline to allow errors);
keep if in_endline_new==0|(in_endline_new==1 & selected_child_age>=48 & selected_child_age<=96 & selected_child_age!=.)|
	(in_endline_new==1 & child_gender_mismatch_resolved==0);

* define indicators for treatment;
qui gen communitybased=(treatment==6);
qui gen purecontrol=(treatment==1);
qui gen ECDAnnex_control=(treatment==4);
qui gen ECDAnnex_treated=(treatment==5);

* normalize endline MDAT scores to combined control group distribution;
/*adjust for age and gender, following normalization of gambia_ecd_clean1.do*/
drop zfinemotor_adj_endline zlanghear_adj_endline;
foreach x in finemotor langhear {;
	qui xi: reg `x'_endline selected_child_age selected_child_age2 child_female 
		if purecontrol==1|ECDAnnex_control==1;
	qui predict e, residuals; 
	qui su e if purecontrol==1|ECDAnnex_control==1;
	qui gen z`x'_adj_endline=(e-r(mean))/r(sd);
	drop e;
};
lab var zfinemotor_adj_endline "MDAT fine motor endline z-score (age-adjusted)";
lab var zlanghear_adj_endline "MDAT language & hearing endline z-score (age-adjusted)";

* sample sizes (# of children and project sites);
qui egen site=tag(settlement_code);
table treatment, c(freq rawsum site);

* get sample sizes by baseline/endline (old/new) and treatment status;
table treatment, c(rawsum in_baseline rawsum in_endline_old rawsum in_endline_new);

* DISTRIBUTIONS OF BASELINE/ENDLINE OUTCOMES;
* combine graphs: 2x2 works best for densities;
local g=1;
/*MDAT scores (adjusted), baseline*/
twoway (kdensity zfinemotor_adj_baseline if treatment==4, lpattern(l) lcolor(black) lwidth(medthick))
	(kdensity zfinemotor_adj_baseline if treatment==5, lpattern(_) lcolor(gray) lwidth(medthick)),
	title("fine motor") ytitle("density") xtitle("z-score (baseline)") 
	legend(label(1 "ECD Annex control") label(2 "ECD Annex treated") rows(1))
	nodraw name(p`g');
local g=`g'+1;
twoway (kdensity zlanghear_adj_baseline if treatment==4, lpattern(l) lcolor(black) lwidth(medthick))
	(kdensity zlanghear_adj_baseline if treatment==5, lpattern(_) lcolor(gray) lwidth(medthick)),
	title("language and hearing") ytitle("density") xtitle("z-score (baseline)") 
	legend(label(1 "ECD Annex control") label(2 "ECD Annex treated") rows(1))
	nodraw name(p`g');
local g=`g'+1;
twoway (kdensity zfinemotor_adj_baseline if treatment==1, lpattern(l) lcolor(black) lwidth(medthick))
	(kdensity zfinemotor_adj_baseline if treatment==6, lpattern(_) lcolor(gray) lwidth(medthick)),
	title("fine motor") ytitle("density") xtitle("z-score (baseline)") 
	legend(label(1 "community-based control") label(2 "community-based treated") rows(1))
	nodraw name(p`g');
local g=`g'+1;
twoway (kdensity zlanghear_adj_baseline if treatment==1, lpattern(l) lcolor(black) lwidth(medthick))
	(kdensity zlanghear_adj_baseline if treatment==6, lpattern(_) lcolor(gray) lwidth(medthick)),
	title("language and hearing") ytitle("density") xtitle("z-score (baseline)") 
	legend(label(1 "community-based control") label(2 "community-based treated") rows(1))
	nodraw name(p`g');
local g=`g'+1;

/*MDAT scores (adjusted), endline*/
twoway (kdensity zfinemotor_adj_endline if treatment==4, lpattern(l) lcolor(black) lwidth(medthick))
	(kdensity zfinemotor_adj_endline if treatment==5, lpattern(_) lcolor(gray) lwidth(medthick)),
	ytitle("density") xtitle("z-score (endline)") 
	legend(label(1 "ECD Annex control") label(2 "ECD Annex treated") rows(1))
	nodraw name(p`g');
local g=`g'+1;
twoway (kdensity zlanghear_adj_endline if treatment==4, lpattern(l) lcolor(black) lwidth(medthick))
	(kdensity zlanghear_adj_endline if treatment==5, lpattern(_) lcolor(gray) lwidth(medthick)),
	ytitle("density") xtitle("z-score (endline)") 
	legend(label(1 "ECD Annex control") label(2 "ECD Annex treated") rows(1))
	nodraw name(p`g');
local g=`g'+1;
twoway (kdensity zfinemotor_adj_endline if treatment==1, lpattern(l) lcolor(black) lwidth(medthick))
	(kdensity zfinemotor_adj_endline if treatment==6, lpattern(_) lcolor(gray) lwidth(medthick)),
	ytitle("density") xtitle("z-score (endline)") 
	legend(label(1 "community-based control") label(2 "community-based treated") rows(1))
	nodraw name(p`g');
local g=`g'+1;
twoway (kdensity zlanghear_adj_endline if treatment==1, lpattern(l) lcolor(black) lwidth(medthick))
	(kdensity zlanghear_adj_endline if treatment==6, lpattern(_) lcolor(gray) lwidth(medthick)),
	ytitle("density") xtitle("z-score (endline)") 
	legend(label(1 "community-based control") label(2 "community-based treated") rows(1))
	nodraw name(p`g');
local g=`g'+1;

/*2x2 graphs*/
grc1leg p1 p2 p5 p6, xcommon title("MDAT scores") subtitle("ECD Annex experiment") rows(2) cols(2) 
	note("Scores adjusted by age, age squared, and gender.") name(gc1);
qui graph export `output'\gambia_ecd_edcc_figure4_gc1.wmf, as(wmf) replace;
qui graph export `output'\gambia_ecd_edcc_figure4_gc1.eps, as(eps) replace;
grc1leg p3 p4 p7 p8, xcommon title("MDAT scores") subtitle("Community-based ECD experiment") rows(2) cols(2) 
	note("Scores adjusted by age, age squared, and gender.") name(gc2);
qui graph export `output'\gambia_ecd_edcc_figure4_gc2.wmf, as(wmf) replace;
qui graph export `output'\gambia_ecd_edcc_figure4_gc2.eps, as(eps) replace;

local end=`"$S_TIME"'; 
di "`start'";
di "`end'";
log close;
